A demo for SUMO Submission evaluation package

source("../GAA-EVAL.R")
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'dplyr' was built under R version 3.4.4
## Warning: package 'pROC' was built under R version 3.4.4
## Warning: package 'reshape2' was built under R version 3.4.3
## Warning: package 'ggrepel' was built under R version 3.4.4
## Warning: package 'gmodels' was built under R version 3.4.4

Read in the Experimental data provided by CAGI

exp.data <- read.RealData(file = "exp_data.csv", sep = "\t",
                             col.id = 1, col.value = 2, col.sd = 3,na.character = 'NA')
length(exp.data$value)
exp.data.s <- read.RealData(file = "exp_data_s.csv", sep = ",",
                             col.id = 1, col.value = 2, col.sd = 3,na.character = 'NA')
head(exp.data.s$value)

Read in the submission folders

sub.data <- read.Submission.Folder(folder.name = "prediction/",col.id = 1,
                                      col.value = 2, col.sd = 3, real.data = exp.data)
sub.data.s = read.Submission.Folder(folder.name = "prediction/",col.id = 1,
                                      col.value = 2, col.sd = 3, real.data = exp.data.s)
sub.data = addGroup(sub.data,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))
sub.data.s = addGroup(sub.data.s,c(1,1,2,3,3,4,4,5,6,6,6,6,7,8,9,9))

ScatterPlot inspection

plot_all_scatter(real.data = exp.data, pred.data = sub.data, z.transform = TRUE)
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4894 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 4484 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).
## Warning: Removed 604 rows containing missing values (geom_point).
## Warning: Removed 5109 rows containing missing values (geom_segment).

Correlation-based Evaluation

# 1. Render coefficient value
result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
                                       method = "pearson", sd.use = NA,z.transform = TRUE)
head(result.cor.pearson)
##                            pearson.coefficient.n=219      p.value
## Group_39-prediction_file-1                 0.3165561 2.169579e-06
## Group_39-prediction_file-2                 0.3123934 2.992671e-06
## Group_40-prediction_file-1                 0.2796749 3.178925e-05
## Group_41-prediction_file-1                 0.3675506 2.799291e-08
## Group_41-prediction_file-2                 0.3675506 2.799291e-08
## Group_42-prediction_file-1                 0.1334869 5.062460e-02
# 2. Plot Correlation
plot.Correlation(result.cor.pearson, "Pearson")

RMSD-based Evaluation

result.rmsd1 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = TRUE)
head(result.rmsd1)
##                                 RMSD
## Group_39-prediction_file-1 1.0685672
## Group_39-prediction_file-2 0.9949796
## Group_40-prediction_file-1 1.1218989
## Group_41-prediction_file-1 1.5450756
## Group_41-prediction_file-2 1.5450756
## Group_42-prediction_file-1 1.6199197
plot.RMSD(result.rmsd1, method="")

# with variance.normalization
result.rmsd2 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = TRUE)
head(result.rmsd2)
##                                RMSD
## Group_39-prediction_file-1 1.094220
## Group_39-prediction_file-2 1.018284
## Group_40-prediction_file-1 1.138296
## Group_41-prediction_file-1 1.569258
## Group_41-prediction_file-2 1.569258
## Group_42-prediction_file-1 1.617173
plot.RMSD(result.rmsd2, method="")

# with variance.normalization
result.rmsd3 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = FALSE)
head(result.rmsd3)
##                                 RMSD
## Group_39-prediction_file-1 0.7423798
## Group_39-prediction_file-2 0.7086982
## Group_40-prediction_file-1 0.6115521
## Group_41-prediction_file-1 0.5694153
## Group_41-prediction_file-2 0.5694153
## Group_42-prediction_file-1 0.6404718
plot.RMSD(result.rmsd3, method="")

# with variance.normalization
result.rmsd4 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = FALSE)
head(result.rmsd4)
##                                 RMSD
## Group_39-prediction_file-1 0.7602017
## Group_39-prediction_file-2 0.7252975
## Group_40-prediction_file-1 0.6204900
## Group_41-prediction_file-1 0.5783273
## Group_41-prediction_file-2 0.5783273
## Group_42-prediction_file-1 0.6393859
plot.RMSD(result.rmsd4, method="")

Cut-off-based Evaluation

result.auc.0.4 <- eval.AUC(real.data = exp.data.s, pred.data = sub.data.s, 
                           threshold = 0.4,z.transform = T)
## [1] "Group_39-prediction_file-1"
## [1] "Group_39-prediction_file-2"
## [1] "Group_40-prediction_file-1"
## [1] "Group_41-prediction_file-1"
## [1] "Group_41-prediction_file-2"
## [1] "Group_42-prediction_file-1"
## [1] "Group_42-prediction_file-2"
## [1] "Group_43-prediction_file-1"
## [1] "Group_44-prediction_file-1"
## [1] "Group_44-prediction_file-2"
## [1] "Group_44-prediction_file-3"
## [1] "Group_44-prediction_file-4"
## [1] "Group_45-prediction_file-1"
## [1] "Group_46-prediction_file-1-late"
## [1] "Group_47-prediction_file-1"
## [1] "Group_47-prediction_file-2"
head(result.auc.0.4$results)
##                            AUC.0.4 sensitivity specificity accuracy
## Group_39-prediction_file-1  0.6968      0.5616      0.7254   0.6698
## Group_39-prediction_file-2  0.6938      0.5068      0.7465   0.6651
## Group_40-prediction_file-1  0.6953      0.3836      0.7746   0.6419
## Group_41-prediction_file-1  0.6753      0.7534      0.4789   0.5721
## Group_41-prediction_file-2  0.6753      0.7534      0.4789   0.5721
## Group_42-prediction_file-1  0.5918      0.3836      0.7113   0.6000
##                            precision f1_score  bAccu
## Group_39-prediction_file-1    0.5125   0.5359 0.6435
## Group_39-prediction_file-2    0.5068   0.5068 0.6267
## Group_40-prediction_file-1    0.4667   0.4211 0.5791
## Group_41-prediction_file-1    0.4264   0.5446 0.6161
## Group_41-prediction_file-2    0.4264   0.5446 0.6161
## Group_42-prediction_file-1    0.4058   0.3944 0.5474
plot.AUC(result.auc.0.4)

Between-method Evaluation

# for all the submission files
result.bM.pearson <-eval.Correlation.Between(real.data = exp.data.s, pred.data = sub.data.s,
                                              method = "pearson",sd.use = NA,z.transform = TRUE)
plot.Correlation.Between(result.bM.pearson$coefficient, method="pearson")

# for best submission of each group
result.bM.pearson <-eval.Correlation.Between(real.data = exp.data, pred.data = sub.data,
                                              method = "pearson",sd.use = NA,z.transform = TRUE,grouped = TRUE)
## Warning: package 'bindrcpp' was built under R version 3.4.4
plot.Correlation.Between(result.bM.pearson$coefficient, method="pearson")

Partial-Correlation Evaluation

# result.pCor <- eval.Partial.Correlation(real.data = exp.data, pred.data = sub.data, method = "spearman")
# plot.Correlation.Between(result.bM.spearman$coefficient, method="Spearman")

PCA Plot

length(exp.data.s$value)
## [1] 219
total = cbind(real = exp.data.s$value,sub.data.s$value)
Plot.PCA(na.omit(total), labels=F, legend=TRUE) 

Uniqueness Evaluation

# uniqueness as adj.r^2 difference between total linear model and linear models without certain group
result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data)
result.uniq
##      uniqueness
## 1 -0.0035299825
## 2 -0.0018514005
## 3  0.0072155217
## 4 -0.0019861941
## 5  0.0042271752
## 6  0.0014769835
## 7 -0.0013198760
## 8 -0.0006020648
## 9  0.0036086216
plot.uniqueness(result.uniq, method="")

result.uniq.s = eval.uniqueness(real.data = exp.data.s, pred.data = sub.data.s)
sub.data.s$group$group
##  [1] 1 1 2 3 3 4 4 5 6 6 6 6 7 8 9 9
plot.uniqueness(result.uniq.s, method="")

Bootstrap Analysis

For Correlation-based Evaluation, provide mean, CI, and median pval

# 1. Render coefficient value
boot.result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
                                       method = "pearson", sd.use = NA,z.transform = TRUE,boot = T)
head(boot.result.cor.pearson)
##                                  avg     low_ci   high_ci         sd
## Group_39-prediction_file-1 0.3159587 0.21933427 0.4256732 0.06414406
## Group_39-prediction_file-2 0.3120020 0.21528626 0.4206262 0.06283910
## Group_40-prediction_file-1 0.2848922 0.19129022 0.3853243 0.05991413
## Group_41-prediction_file-1 0.3653808 0.27890482 0.4500688 0.05126986
## Group_41-prediction_file-2 0.3653808 0.27890482 0.4500688 0.05126986
## Group_42-prediction_file-1 0.1311390 0.01760134 0.2586587 0.07550710
##                                 p.value
## Group_39-prediction_file-1 2.668560e-06
## Group_39-prediction_file-2 3.399519e-06
## Group_40-prediction_file-1 2.732385e-05
## Group_41-prediction_file-1 2.218511e-08
## Group_41-prediction_file-2 2.218511e-08
## Group_42-prediction_file-1 6.216560e-02
# 2. Plot Correlation
plot.Correlation(boot.result.cor.pearson, "Pearson",boot = TRUE)

For RMSD-based Evaluation

# 1. Render coefficient value
result.cor.pearson <- eval.Correlation(real.data = exp.data.s, pred.data = sub.data.s,
                                       method = "pearson", sd.use = NA,z.transform = TRUE)
head(result.cor.pearson)
##                            pearson.coefficient.n=219      p.value
## Group_39-prediction_file-1                 0.3165561 2.169579e-06
## Group_39-prediction_file-2                 0.3123934 2.992671e-06
## Group_40-prediction_file-1                 0.2796749 3.178925e-05
## Group_41-prediction_file-1                 0.3675506 2.799291e-08
## Group_41-prediction_file-2                 0.3675506 2.799291e-08
## Group_42-prediction_file-1                 0.1334869 5.062460e-02
# 2. Plot Correlation
plot.Correlation(result.cor.pearson, "Pearson")

RMSD-based Evaluation

result.rmsd1 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = TRUE,boot = T)
head(result.rmsd1)
##                                RMSD    low_ci  high_ci         sd
## Group_39-prediction_file-1 1.077589 0.9747741 1.184645 0.06489218
## Group_39-prediction_file-2 1.001295 0.9116030 1.097849 0.05719669
## Group_40-prediction_file-1 1.127197 1.0409296 1.223458 0.05720155
## Group_41-prediction_file-1 1.567284 1.3940711 1.756550 0.11342868
## Group_41-prediction_file-2 1.567284 1.3940711 1.756550 0.11342868
## Group_42-prediction_file-1 1.630662 1.4583752 1.801228 0.10278278
plot.RMSD(result.rmsd1, method="")

# with variance.normalization
result.rmsd2 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = TRUE,boot = T)
head(result.rmsd2)
##                                RMSD    low_ci  high_ci         sd
## Group_39-prediction_file-1 1.099499 0.9941265 1.209010 0.06557091
## Group_39-prediction_file-2 1.023055 0.9290500 1.117659 0.05752699
## Group_40-prediction_file-1 1.145974 1.0572586 1.238049 0.05580197
## Group_41-prediction_file-1 1.578594 1.4014106 1.777544 0.11004123
## Group_41-prediction_file-2 1.578594 1.4014106 1.777544 0.11004123
## Group_42-prediction_file-1 1.630499 1.4693911 1.810466 0.10095696
plot.RMSD(result.rmsd2, method="")

# with variance.normalization
result.rmsd3 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = TRUE, density.distance.adjust = FALSE, variance.normalization = FALSE,boot = T)
head(result.rmsd3)
##                                 RMSD    low_ci   high_ci         sd
## Group_39-prediction_file-1 0.7391888 0.6719041 0.8039118 0.03901417
## Group_39-prediction_file-2 0.7055611 0.6410194 0.7671101 0.03995325
## Group_40-prediction_file-1 0.6079143 0.5474106 0.6713188 0.03665754
## Group_41-prediction_file-1 0.5675694 0.5260024 0.6066957 0.02454987
## Group_41-prediction_file-2 0.5675694 0.5260024 0.6066957 0.02454987
## Group_42-prediction_file-1 0.6366967 0.5786906 0.6980475 0.03634241
plot.RMSD(result.rmsd3, method="")

# with variance.normalization
result.rmsd4 <- eval.RMSD(real.data = exp.data.s, pred.data = sub.data.s,sd.use = NA, 
                      density.distance = FALSE, density.distance.adjust = FALSE, variance.normalization = FALSE,boot = T)
head(result.rmsd4)
##                                 RMSD    low_ci   high_ci         sd
## Group_39-prediction_file-1 0.7609928 0.6960201 0.8240467 0.03954468
## Group_39-prediction_file-2 0.7258915 0.6564268 0.7901490 0.04020173
## Group_40-prediction_file-1 0.6217584 0.5682989 0.6789840 0.03488304
## Group_41-prediction_file-1 0.5792009 0.5373457 0.6190344 0.02433503
## Group_41-prediction_file-2 0.5792009 0.5373457 0.6190344 0.02433503
## Group_42-prediction_file-1 0.6404705 0.5874168 0.6999961 0.03458017
plot.RMSD(result.rmsd4, method="")

For Uniqueness Evaluation

result.uniq = eval.uniqueness(real.data = exp.data, pred.data = sub.data,boot = TRUE)
result.uniq
##     uniqueness       low_ci    high_ci          sd
## 1 0.0008588146 -0.004117093 0.01558673 0.006794240
## 2 0.0017450875 -0.004095540 0.01830906 0.007176755
## 3 0.0104885219 -0.003393715 0.03487135 0.012783660
## 4 0.0026437797 -0.003987992 0.01989985 0.009027008
## 5 0.0069503033 -0.003842305 0.02707928 0.010213371
## 6 0.0067178900 -0.004039506 0.03145771 0.012257302
## 7 0.0019218868 -0.003991726 0.01654487 0.007345860
## 8 0.0035549520 -0.003949144 0.02356930 0.009393296
## 9 0.0065076136 -0.003900567 0.02681468 0.011319813
plot.uniqueness(result.uniq, method="",boot = TRUE)

result.uniq.s = eval.uniqueness(real.data = exp.data.s, pred.data = sub.data.s,boot = TRUE)
result.uniq.s
##     uniqueness       low_ci    high_ci          sd
## 1 0.0008539077 -0.003936736 0.01557216 0.006578812
## 2 0.0024231333 -0.003922784 0.02120193 0.008289418
## 3 0.0101985701 -0.003271067 0.03210386 0.011488873
## 4 0.0027188455 -0.003884709 0.02252701 0.009202256
## 5 0.0072641114 -0.003777335 0.02947057 0.011570498
## 6 0.0064521426 -0.003834966 0.02913687 0.011782762
## 7 0.0020569491 -0.003946284 0.01895548 0.007524355
## 8 0.0030298598 -0.003899808 0.01901317 0.008519997
## 9 0.0061782886 -0.003839796 0.02801771 0.010706875
plot.uniqueness(result.uniq.s, method="",boot = TRUE)